## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.8
## ✓ tidyr 1.2.0 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'dplyr' was built under R version 4.0.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
## Joining, by = c("item_id", "item", "condition", "first_mention",
## "recent_mention", "knowledge_cue", "start", "end")
Mturk slightly oversampled to 1161. Only 13 ppts indicated they are not native english speakers.
| Reason | Removed | (%) |
|---|---|---|
| ex.native_eng | 13 | 1.1 |
| ex.runtime | 0 | 0.0 |
| —— | NA | NA |
| Total Removed | 13 | 1.1 |
| Retained | 1143 | 98.9 |
57% of ppts passed both attention checks.
attention %>%
group_by(participant_id) %>%
summarize(accuracy = mean(accuracy), .groups="drop") %>%
group_by(accuracy) %>%
summarize(
n = n(),
prop = round(n / nrow(attention), 2)
)| accuracy | n | prop |
|---|---|---|
| 0.0 | 297 | 0.13 |
| 0.5 | 216 | 0.09 |
| 1.0 | 630 | 0.28 |
attention %>%
filter(is_correct == F) %>%
group_by(is_start_or_end) %>%
summarize(
n = n(),
prop = round(n / nrow(attention %>% filter(is_correct == F)), 2)
)| is_start_or_end | n | prop |
|---|---|---|
| FALSE | 606 | 0.75 |
| TRUE | 204 | 0.25 |
Items accuracy looks normal.
attention %>%
mutate(
item_question_id = paste0(item, "_", question_id)
) %>%
group_by(item_question_id) %>%
summarize(accuracy = mean(accuracy), .groups="drop") %>%
ggplot(aes(x = accuracy, y=reorder(item_question_id, -accuracy))) +
stat_summary(fun="mean", geom="bar")The distribution by item looks fairly normal but does lead to some extreme cases (e.g. no cases for top left, TB, 12).
critical %>%
filter(excluded.attention == F) %>%
ggplot(aes(x = factor(item), fill=condition)) +
geom_bar(stat="count", position = "dodge") +
facet_grid(cols=vars(first_mention), rows=vars(recent_mention),
labeller = "label_both")critical %>%
filter(excluded.attention == F) %>%
ggplot(aes(x = knowledge_cue, fill=condition)) +
geom_bar(stat="count", position = "dodge") +
facet_grid(cols=vars(first_mention), rows=vars(recent_mention),
labeller = "label_both")Accuracy is 80% for ppts who passed the attention checks (and 25% for those who didn’t).
critical %>%
ggplot(aes(x = excluded.attention, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar") +
scale_fill_manual(values=c("#009933", "#FF0000"))critical %>%
group_by(excluded.attention) %>%
summarize(accuracy=mean(accuracy), n=n(), .groups="drop")| excluded.attention | accuracy | n |
|---|---|---|
| FALSE | 0.8047619 | 630 |
| TRUE | 0.2553606 | 513 |
critical %>%
filter(excluded.attention == F) %>%
ggplot(aes(x = condition, y = accuracy, fill=condition)) +
stat_summary(fun="mean", geom="bar")critical %>%
filter(excluded.attention == F,
is_start | is_end) %>%
group_by(condition) %>%
summarize(start=mean(is_start), n=n(), .groups="drop")| condition | start | n |
|---|---|---|
| False Belief | 0.8964401 | 309 |
| True Belief | 0.2434211 | 304 |
None of the items look particularly easy/hard.
critical %>%
ggplot(aes(x = reorder(item, -accuracy), y = accuracy, color=excluded.attention)) +
stat_summary(fun="mean", geom="point") +
facet_grid(cols=vars(excluded.attention), labeller=label_both) +
scale_color_manual(values=c("#009933", "#FF0000"))The incorrect answers from retained ppts mostly look like genuine mistakes.
critical %>%
filter(is_correct == F,
excluded.attention == FALSE) %>%
select(participant_id, item_id, correct_answer, response, is_correct) %>%
arrange(item_id)| participant_id | item_id | correct_answer | response | is_correct |
|---|---|---|---|---|
| 1758 | 1_fb_1_e_s_ex | box | room | FALSE |
| 1893 | 1_fb_1_e_s_im | box | box but finds it is missing | FALSE |
| 415 | 1_tb_1_e_e_ex | basket | room | FALSE |
| 1411 | 1_tb_1_e_e_ex | basket | box | FALSE |
| 669 | 1_tb_1_e_s_ex | basket | box | FALSE |
| 1134 | 1_tb_1_e_s_ex | basket | box | FALSE |
| 1853 | 1_tb_1_s_s_im | basket | box | FALSE |
| 150 | 10_fb_1_e_e_im | toolbox | van | FALSE |
| 1969 | 10_fb_1_e_e_im | toolbox | van | FALSE |
| 314 | 10_fb_1_s_e_im | toolbox | van | FALSE |
| 645 | 10_fb_1_s_e_im | toolbox | van | FALSE |
| 1153 | 10_fb_1_s_e_im | toolbox | van | FALSE |
| 1409 | 10_fb_1_s_e_im | toolbox | van | FALSE |
| 1672 | 10_fb_1_s_s_im | toolbox | van | FALSE |
| 885 | 10_tb_1_e_e_ex | van | toolbox | FALSE |
| 640 | 10_tb_1_e_s_ex | van | toolbox | FALSE |
| 1425 | 10_tb_1_e_s_ex | van | toolbox | FALSE |
| 1580 | 10_tb_1_e_s_ex | van | toolbox | FALSE |
| 1844 | 10_tb_1_e_s_im | van | toolbox | FALSE |
| 914 | 10_tb_1_s_e_im | van | toolbox | FALSE |
| 1972 | 10_tb_1_s_e_im | van | toolbox | FALSE |
| 1641 | 11_fb_1_e_s_im | suitcase | backpack | FALSE |
| 1817 | 11_fb_1_s_e_im | suitcase | backpack | FALSE |
| 902 | 11_tb_1_s_e_ex | backpack | suitcase | FALSE |
| 1967 | 11_tb_1_s_e_im | backpack | suitcase | FALSE |
| 612 | 11_tb_1_s_s_ex | backpack | suitcase | FALSE |
| 1302 | 11_tb_1_s_s_im | backpack | suitcase | FALSE |
| 1423 | 11_tb_1_s_s_im | backpack | suitcase | FALSE |
| 1009 | 12_fb_1_e_s_im | stable | hut | FALSE |
| 1219 | 12_fb_1_s_s_im | stable | hut | FALSE |
| 1269 | 12_tb_1_e_s_im | hut | stable | FALSE |
| 209 | 12_tb_1_s_e_ex | hut | stable | FALSE |
| 520 | 12_tb_1_s_e_ex | hut | stable | FALSE |
| 1491 | 12_tb_1_s_e_ex | hut | stable | FALSE |
| 1837 | 12_tb_1_s_e_ex | hut | stable | FALSE |
| 1777 | 12_tb_1_s_s_ex | hut | stable | FALSE |
| 1835 | 2_fb_1_e_e_im | cupboard | sandwich | FALSE |
| 1839 | 2_fb_1_e_s_im | cupboard | fridge | FALSE |
| 339 | 2_tb_1_e_s_im | fridge | cupboard | FALSE |
| 112 | 2_tb_1_s_e_ex | fridge | cupboard | FALSE |
| 1528 | 2_tb_1_s_s_ex | fridge | cupboard | FALSE |
| 1963 | 2_tb_1_s_s_ex | fridge | cupboard | FALSE |
| 861 | 2_tb_1_s_s_im | fridge | cupboard | FALSE |
| 926 | 2_tb_1_s_s_im | fridge | cupboard | FALSE |
| 1804 | 2_tb_1_s_s_im | fridge | cupboard | FALSE |
| 159 | 3_fb_1_e_e_im | sink | basket | FALSE |
| 704 | 3_fb_1_s_s_ex | sink | stain | FALSE |
| 1416 | 3_tb_1_e_e_ex | basket | sink | FALSE |
| 247 | 3_tb_1_e_e_im | basket | sink | FALSE |
| 225 | 3_tb_1_e_s_ex | basket | sink | FALSE |
| 372 | 3_tb_1_s_s_ex | basket | sink | FALSE |
| 1298 | 3_tb_1_s_s_ex | basket | stain | FALSE |
| 1505 | 3_tb_1_s_s_im | basket | sink | FALSE |
| 1101 | 4_fb_1_e_e_im | shed | garage | FALSE |
| 518 | 4_fb_1_e_s_im | shed | garage | FALSE |
| 1947 | 4_fb_1_s_e_im | shed | garage | FALSE |
| 342 | 4_tb_1_e_e_ex | garage | shed | FALSE |
| 174 | 4_tb_1_e_e_im | garage | shed | FALSE |
| 408 | 4_tb_1_e_s_ex | garage | shed | FALSE |
| 349 | 4_tb_1_e_s_ex | garage | shed | FALSE |
| 1677 | 4_tb_1_e_s_ex | garage | shed | FALSE |
| 1417 | 4_tb_1_s_s_ex | garage | shed | FALSE |
| 2021 | 4_tb_1_s_s_ex | garage | shed | FALSE |
| 591 | 4_tb_1_s_s_im | garage | yard | FALSE |
| 2019 | 4_tb_1_s_s_im | garage | shed | FALSE |
| 371 | 5_fb_1_e_e_im | hall | study | FALSE |
| 366 | 5_fb_1_s_e_im | hall | bathroom | FALSE |
| 1464 | 5_fb_1_s_e_im | hall | study | FALSE |
| 121 | 5_tb_1_e_e_im | study | hall | FALSE |
| 1590 | 5_tb_1_e_s_ex | study | hall | FALSE |
| 1819 | 5_tb_1_e_s_ex | study | hall | FALSE |
| 309 | 5_tb_1_s_e_im | study | hall | FALSE |
| 1892 | 5_tb_1_s_s_ex | study | hall | FALSE |
| 1720 | 5_tb_1_s_s_im | study | hall | FALSE |
| 957 | 6_fb_1_s_e_ex | drawer | cabinet | FALSE |
| 1264 | 6_fb_1_s_e_ex | drawer | cabinet | FALSE |
| 258 | 6_fb_1_s_s_ex | drawer | ross | FALSE |
| 1930 | 6_fb_1_s_s_im | drawer | ross | FALSE |
| 839 | 6_tb_1_e_e_ex | cabinet | drawer | FALSE |
| 555 | 6_tb_1_e_e_im | cabinet | ross wanders | FALSE |
| 1684 | 6_tb_1_e_s_im | cabinet | drawer | FALSE |
| 178 | 6_tb_1_s_e_ex | cabinet | drawer | FALSE |
| 1077 | 6_tb_1_s_e_ex | cabinet | drawer | FALSE |
| 717 | 6_tb_1_s_e_im | cabinet | drawer | FALSE |
| 1018 | 6_tb_1_s_s_ex | cabinet | drawer | FALSE |
| 1376 | 7_fb_1_e_s_im | garage | fridge | FALSE |
| 930 | 7_fb_1_s_e_ex | garage | fridge | FALSE |
| 1667 | 7_fb_1_s_e_im | garage | fridge | FALSE |
| 183 | 7_fb_1_s_s_im | garage | fridge | FALSE |
| 1371 | 7_fb_1_s_s_im | garage | fridge | FALSE |
| 1063 | 7_tb_1_e_e_ex | fridge | garage | FALSE |
| 1253 | 7_tb_1_e_s_ex | fridge | garage | FALSE |
| 1809 | 7_tb_1_s_e_ex | fridge | garage | FALSE |
| 1928 | 7_tb_1_s_s_ex | fridge | garaage | FALSE |
| 895 | 8_fb_1_s_e_im | hall | bedroom | FALSE |
| 399 | 8_fb_1_s_s_ex | hall | bedroom | FALSE |
| 660 | 8_tb_1_e_e_ex | bedroom | good | FALSE |
| 1739 | 8_tb_1_e_e_ex | bedroom | hall | FALSE |
| 730 | 8_tb_1_e_e_im | bedroom | hall | FALSE |
| 1000 | 8_tb_1_e_e_im | bedroom | hall | FALSE |
| 941 | 8_tb_1_e_s_ex | bedroom | hall | FALSE |
| 1035 | 8_tb_1_e_s_ex | bedroom | hall | FALSE |
| 147 | 8_tb_1_s_e_ex | bedroom | hall | FALSE |
| 123 | 8_tb_1_s_e_im | bedroom | hall | FALSE |
| 574 | 8_tb_1_s_e_im | bedroom | hall | FALSE |
| 920 | 8_tb_1_s_e_im | bedroom | garden | FALSE |
| 1568 | 8_tb_1_s_s_ex | bedroom | hall | FALSE |
| 1833 | 8_tb_1_s_s_ex | bedroom | hall | FALSE |
| 1933 | 8_tb_1_s_s_ex | bedroom | hall | FALSE |
| 1806 | 9_fb_1_e_e_ex | cupboard | drawer | FALSE |
| 890 | 9_fb_1_e_e_im | cupboard | drawer | FALSE |
| 1828 | 9_fb_1_e_s_ex | cupboard | drawer | FALSE |
| 1873 | 9_fb_1_e_s_ex | cupboard | cabinet | FALSE |
| 1082 | 9_fb_1_s_e_im | cupboard | drawer | FALSE |
| 1245 | 9_fb_1_s_e_im | cupboard | drawer | FALSE |
| 566 | 9_tb_1_e_e_ex | drawer | cupboard | FALSE |
| 1715 | 9_tb_1_e_s_ex | drawer | cupboard | FALSE |
| 1124 | 9_tb_1_e_s_im | drawer | cupboard | FALSE |
| 844 | 9_tb_1_s_e_ex | drawer | cupboard | FALSE |
| 1235 | 9_tb_1_s_e_ex | drawer | cubpboard | FALSE |
| 846 | 9_tb_1_s_e_im | drawer | cupboard | FALSE |
| 1808 | 9_tb_1_s_s_ex | drawer | cupboard | FALSE |
| 230 | 9_tb_1_s_s_im | drawer | kitchen | FALSE |
First mention shows a noticeable effect. Effects of other vars look small.
critical %>%
ggplot(aes(x = condition, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))critical %>%
ggplot(aes(x = knowledge_cue, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))critical %>%
ggplot(aes(x = first_mention, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))critical %>%
mutate(start = ifelse(is_start, 1, 0)) %>%
ggplot(aes(x = first_mention, y = start, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))critical %>%
ggplot(aes(x = recent_mention, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))critical %>%
mutate(start = ifelse(is_start, 1, 0)) %>%
ggplot(aes(x = recent_mention, y = start, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))attention %>%
group_by(participant_id) %>%
summarize(
passage_reading_time = mean(passage_reading_time),
accuracy = mean(accuracy),
.groups="drop"
) %>%
ggplot(aes(x = passage_reading_time, y = accuracy)) +
# geom_point() +
stat_summary_bin(fun.data = mean_cl_boot, geom="pointrange", binwidth = 0.1) +
scale_x_log10() +
geom_smooth(method="lm", formula="y~x") +
labs(y = "attention_accuracy")## Warning: Removed 6 rows containing missing values (geom_segment).
attention %>%
ggplot(aes(x = reaction_time, y = accuracy)) +
# geom_point() +
stat_summary_bin(fun.data = mean_cl_boot, geom="pointrange", binwidth = 0.1) +
scale_x_log10() +
geom_smooth(method="lm", formula="y~x") +
labs(y = "Attention accuracy")## Warning: Removed 5 rows containing missing values (geom_segment).
critical %>%
group_by(participant_id, excluded.attention) %>%
summarize(
passage_reading_time = mean(passage_reading_time),
accuracy = mean(accuracy),
.groups="drop"
) %>%
ggplot(aes(x = passage_reading_time, y = accuracy, color=excluded.attention)) +
# geom_point() +
stat_summary_bin(fun.data = mean_cl_boot, geom="pointrange", binwidth = 0.2) +
scale_x_log10() +
geom_smooth(method="lm", formula="y~x", se=F) +
labs(y = "critical accuracy") +
scale_color_manual(values=c("#009933", "#FF0000"))## Warning: Removed 5 rows containing missing values (geom_segment).
critical %>%
ggplot(aes(x = reaction_time, y = accuracy, color=excluded.attention)) +
# geom_point() +
stat_summary_bin(fun.data = mean_cl_boot, geom="pointrange", binwidth = 0.2) +
scale_x_log10() +
geom_smooth(method="lm", formula="y~x", se=F) +
labs(y = "critical accuracy") +
scale_color_manual(values=c("#009933", "#FF0000"))## Warning: Removed 2 rows containing missing values (geom_segment).
Overall GPT-3 accuracy was 0.61.
df_fb_gpt3_dv %>%
ggplot(aes(x = condition, y = mdl.accuracy, fill=condition)) +
stat_summary(fun="mean", geom="bar")df_fb_gpt3_dv %>%
group_by(condition) %>%
summarize(accuracy=mean(mdl.accuracy), n=n(), .groups="drop")| condition | accuracy | n |
|---|---|---|
| False Belief | 0.4895833 | 96 |
| True Belief | 0.7291667 | 96 |
First, we ask whether condition predicts response, above and beyond the other covariates excluding log_odds from GPT-3.
Descriptively, we can ask whether a higher proportion of people respond with the START location in the FB or TB condition.
df_merged %>%
group_by(condition, knowledge_cue) %>%
summarise(prop_start = mean(is_start),
count = n(),
.groups="drop")| condition | knowledge_cue | prop_start | count |
|---|---|---|---|
| False Belief | Explicit | 0.9625000 | 160 |
| False Belief | Implicit | 0.8255034 | 149 |
| True Belief | Explicit | 0.2974684 | 158 |
| True Belief | Implicit | 0.1849315 | 146 |
df_merged %>%
ggplot(aes(x = condition,
y = is_start_numeric,
color = condition)) +
# geom_jitter(alpha = .1) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "P(START)") +
scale_color_viridis_d() +
theme_bw() +
facet_wrap(~knowledge_cue,
nrow = 2)model_all_but_lo = glmer(
is_start_numeric ~ condition + knowledge_cue+
recent_mention +
first_mention +
(1 + condition | item),
data = df_merged,
control=glmerControl(optimizer="bobyqa"),
family = binomial())
model_all_but_lo_and_condition = glmer(
is_start_numeric ~ knowledge_cue+
recent_mention +
first_mention +
(1 + condition | item),
data = df_merged,
control=glmerControl(optimizer="bobyqa"),
family = binomial())## boundary (singular) fit: see ?isSingular
| npar | AIC | BIC | logLik | deviance | Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|---|---|---|---|---|
| model_all_but_lo_and_condition | 7 | 567.8665 | 598.7950 | -276.9332 | 553.8665 | NA | NA | NA |
| model_all_but_lo | 8 | 538.1358 | 573.4827 | -261.0679 | 522.1358 | 31.7307 | 1 | 0 |
There is a significant effect of condition when accounting for log-odds.
model_all_fe = glmer(data = df_merged,
is_start_numeric ~ condition + knowledge_cue + log_odds +
recent_mention +
first_mention +
(1 + condition| item),
control=glmerControl(optimizer="bobyqa"),
family = binomial())## boundary (singular) fit: see ?isSingular
model_no_condition = glmer(data = df_merged,
is_start_numeric ~ knowledge_cue + log_odds +
recent_mention +
first_mention +
(1 + condition| item),
control=glmerControl(optimizer="bobyqa"),
family = binomial())## boundary (singular) fit: see ?isSingular
| npar | AIC | BIC | logLik | deviance | Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|---|---|---|---|---|
| model_no_condition | 8 | 567.0507 | 602.3977 | -275.5254 | 551.0507 | NA | NA | NA |
| model_all_fe | 9 | 538.6673 | 578.4326 | -260.3337 | 520.6673 | 30.38342 | 1 | 0 |
The full model shows a significant effect only for condition.
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## is_start_numeric ~ condition + knowledge_cue + log_odds + recent_mention +
## first_mention + (1 + condition | item)
## Data: df_merged
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 538.7 578.4 -260.3 520.7 604
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2014 -0.5026 0.2214 0.3615 2.8111
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## item (Intercept) 0.2557 0.5057
## conditionTrue Belief 0.3142 0.5605 -1.00
## Number of obs: 613, groups: item, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.452473 0.358776 6.836 8.16e-12 ***
## conditionTrue Belief -3.406383 0.330078 -10.320 < 2e-16 ***
## knowledge_cueImplicit -0.607752 0.354839 -1.713 0.0868 .
## log_odds 0.131102 0.108489 1.208 0.2269
## recent_mentionStart 0.290555 0.226317 1.284 0.1992
## first_mentionStart 0.002604 0.224210 0.012 0.9907
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtTB knwl_I lg_dds rcnt_S
## condtnTrBlf -0.764
## knwldg_cImp -0.562 0.399
## log_odds -0.370 0.342 0.754
## rcnt_mntnSt -0.233 -0.141 -0.068 -0.115
## frst_mntnSt -0.252 -0.067 -0.108 -0.106 0.040
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
The effect of LO approaches significance in the no_condition model.
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: is_start_numeric ~ knowledge_cue + log_odds + recent_mention +
## first_mention + (1 + condition | item)
## Data: df_merged
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 567.1 602.4 -275.5 551.1 605
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.2428 -0.5245 0.2028 0.3891 2.9199
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## item (Intercept) 8.602 2.933
## conditionTrue Belief 13.754 3.709 -1.00
## Number of obs: 613, groups: item, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.201655 0.430617 -0.468 0.6396
## knowledge_cueImplicit -0.446546 0.357413 -1.249 0.2115
## log_odds 0.185081 0.108706 1.703 0.0886 .
## recent_mentionStart 0.255310 0.220988 1.155 0.2480
## first_mentionStart 0.002058 0.220038 0.009 0.9925
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) knwl_I lg_dds rcnt_S
## knwldg_cImp -0.332
## log_odds -0.226 0.774
## rcnt_mntnSt -0.268 -0.065 -0.105
## frst_mntnSt -0.262 -0.096 -0.089 0.045
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
We can visualize this in a couple ways. First, we can look at the residuals of a model without condition, and ask they’re correlated with condition.
df_merged$resid = residuals(model_no_condition)
df_merged %>%
ggplot(aes(x = condition,
y = resid,
color = condition)) +
geom_jitter(alpha = .3) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Residuals") +
geom_hline(yintercept = 0, linetype = "dotted") +
scale_color_viridis_d() +
theme_bw()Residuals are bimodal for all items in TB, and almost all items in FB.
df_merged$resid = residuals(model_no_condition)
df_merged %>%
ggplot(aes(x = condition,
y = resid,
color = condition)) +
geom_jitter(alpha = .3) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Residuals") +
geom_hline(yintercept = 0, linetype = "dotted") +
scale_color_viridis_d() +
theme_bw() +
# facet_grid(rows=vars(knowledge_cue), cols=vars(first_mention)) +
facet_wrap(facets=vars(item))Residuals area also bimodal in all intersections of first mention, recent mention, and knowledge cue, although seems to be less bimodal within false belief for kc:implicit, first_mention:end, and recent_mention:start.
df_merged$resid = residuals(model_no_condition)
df_merged %>%
ggplot(aes(x = condition,
y = resid,
color = knowledge_cue)) +
geom_jitter(alpha = .5) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
labs(x = "Condition",
y = "Residuals") +
geom_hline(yintercept = 0, linetype = "dotted") +
# scale_color_viridis_d() +
theme_bw() +
facet_grid(rows=vars(recent_mention), cols=vars(first_mention), labeller=label_both)Another approach is to bin log-odds, and look at whether the probability of choosing the START location changes as a function of both binned log-odds and condition.
df_merged %>%
mutate(binned_lo = ntile(log_odds, n = 10)) %>%
ggplot(aes(x = binned_lo,
y = is_start_numeric,
color = condition)) +
stat_summary (fun = function(x){mean(x)},
fun.min = function(x){mean(x) - 2*sd(x)/sqrt(length(x))},
fun.max = function(x){mean(x) + 2*sd(x)/sqrt(length(x))},
geom= 'pointrange',
position=position_dodge(width=0.95)) +
geom_smooth() +
labs(x = "Binned Log-odds",
y = "Residuals",
color = "Condition") +
scale_color_viridis_d() +
theme_bw() ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
model_no_condition = glmer(data = df_merged,
is_start_numeric ~ knowledge_cue + log_odds +
recent_mention +
first_mention +
(1 | item),
# control=glmerControl(optimizer="bobyqa"),
family = binomial())
model_all_but_lo_and_condition = glmer(
is_start_numeric ~ knowledge_cue+
recent_mention +
first_mention +
(1 | item),
data = df_merged,
control=glmerControl(optimizer="bobyqa"),
family = binomial())
anova(model_no_condition, model_all_but_lo_and_condition)| npar | AIC | BIC | logLik | deviance | Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|---|---|---|---|---|
| model_all_but_lo_and_condition | 5 | 836.6595 | 858.7513 | -413.3298 | 826.6595 | NA | NA | NA |
| model_no_condition | 6 | 740.9514 | 767.4616 | -364.4757 | 728.9514 | 97.7081 | 1 | 0 |
model_all_fe_ixn = glmer(data = df_merged,
is_start_numeric ~ condition * knowledge_cue + log_odds +
recent_mention +
first_mention +
(1 + condition| item),
# control=glmerControl(optimizer="bobyqa"),
family = binomial())## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0522244 (tol = 0.002, component 1)
model_all_fe = glmer(data = df_merged,
is_start_numeric ~ condition + knowledge_cue + log_odds +
recent_mention +
first_mention +
(1 + condition| item),
# control=glmerControl(optimizer="bobyqa"),
family = binomial())## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.0297018 (tol = 0.002, component 1)
| npar | AIC | BIC | logLik | deviance | Chisq | Df | Pr(>Chisq) | |
|---|---|---|---|---|---|---|---|---|
| model_all_fe | 9 | 538.6679 | 578.4331 | -260.3339 | 520.6679 | NA | NA | NA |
| model_all_fe_ixn | 10 | 536.9429 | 581.1265 | -258.4714 | 516.9429 | 3.724977 | 1 | 0.0536044 |
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## is_start_numeric ~ condition * knowledge_cue + log_odds + recent_mention +
## first_mention + (1 + condition | item)
## Data: df_merged
##
## AIC BIC logLik deviance df.resid
## 536.9 581.1 -258.5 516.9 603
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -6.6022 -0.5413 0.1695 0.3979 2.4964
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## item (Intercept) 0.2601 0.5100
## conditionTrue Belief 0.3133 0.5597 -1.00
## Number of obs: 613, groups: item, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.01408 0.50604 5.956 2.58e-09
## conditionTrue Belief -4.08613 0.52178 -7.831 4.84e-15
## knowledge_cueImplicit -1.41533 0.57879 -2.445 0.0145
## log_odds 0.09548 0.11242 0.849 0.3957
## recent_mentionStart 0.31612 0.22682 1.394 0.1634
## first_mentionStart 0.01024 0.22395 0.046 0.9635
## conditionTrue Belief:knowledge_cueImplicit 1.02208 0.55148 1.853 0.0638
##
## (Intercept) ***
## conditionTrue Belief ***
## knowledge_cueImplicit *
## log_odds
## recent_mentionStart
## first_mentionStart
## conditionTrue Belief:knowledge_cueImplicit .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtTB knwl_I lg_dds rcnt_S frst_S
## condtnTrBlf -0.883
## knwldg_cImp -0.798 0.772
## log_odds -0.363 0.337 0.587
## rcnt_mntnSt -0.125 -0.135 -0.091 -0.134
## frst_mntnSt -0.169 -0.054 -0.078 -0.101 0.039
## cndtnTBl:_I 0.672 -0.766 -0.784 -0.164 0.059 0.020
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0522244 (tol = 0.002, component 1)
ppts.all %>%
group_by(dyslexia, adhd, asd) %>%
summarize(n = n(),
prop = round(n / nrow(ppts.all), 2),
.groups="drop")| dyslexia | adhd | asd | n | prop |
|---|---|---|---|---|
| False | False | False | 822 | 0.71 |
| False | False | True | 39 | 0.03 |
| False | True | False | 149 | 0.13 |
| False | True | True | 12 | 0.01 |
| True | False | False | 86 | 0.07 |
| True | True | False | 16 | 0.01 |
| True | True | True | 32 | 0.03 |
ppts.all %>%
group_by(dyslexia, adhd, asd) %>%
summarize(n = n(),
prop = round(n / nrow(ppts.all), 2),
.groups="drop") %>%
ggplot(aes(x = dyslexia, y = n, fill=adhd)) +
geom_bar(stat="identity", position="dodge") +
facet_grid(cols=vars(asd), labeller="label_both") +
theme_minimal()df_merged <- df_merged %>%
merge(ppts.all, by.y="id", by.x="participant_id", all.y = F)
df_merged %>%
group_by(dyslexia, adhd, asd) %>%
summarize(n = n(),
correct = sum(accuracy),
accuracy = mean(accuracy),
.groups="drop")| dyslexia | adhd | asd | n | correct | accuracy |
|---|---|---|---|---|---|
| False | False | False | 530 | 452 | 0.8528302 |
| False | False | True | 10 | 6 | 0.6000000 |
| False | True | False | 49 | 31 | 0.6326531 |
| False | True | True | 4 | 3 | 0.7500000 |
| True | False | False | 16 | 11 | 0.6875000 |
| True | True | False | 1 | 1 | 1.0000000 |
| True | True | True | 3 | 3 | 1.0000000 |
Dyslexic participants perform worse.
critical <- critical %>%
merge(ppts.all %>% select(participant_id, dyslexia, adhd, asd, age, gender))
critical %>%
ggplot(aes(x = dyslexia, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))As do ppts with ADHD
critical %>%
ggplot(aes(x = adhd, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))And ASD
critical %>%
ggplot(aes(x = asd, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))Men and women perform similarly.
critical %>%
ggplot(aes(x = gender, y = accuracy, fill=excluded.attention)) +
stat_summary(fun="mean", geom="bar", position="dodge", alpha=0.8) +
stat_summary(fun.data=mean_cl_boot, geom="errorbar", position=position_dodge(0.9), width=0.2) +
scale_fill_manual(values=c("#009933", "#FF0000"))Ppts get better with age.
critical %>%
# group_by(participant_id, excluded.attention) %>%
# summarize(
# age = mean(age),
# accuracy = mean(accuracy),
# .groups="drop"
# ) %>%
ggplot(aes(x = age, y = accuracy, color=excluded.attention)) +
# geom_point() +
stat_summary_bin(fun.data = mean_cl_boot, geom="pointrange", binwidth = 0.05) +
scale_x_log10() +
geom_smooth(method="lm", formula="y~x", se=F) +
labs(y = "critical accuracy") +
scale_color_manual(values=c("#009933", "#FF0000"))## Warning: Removed 1 rows containing missing values (geom_segment).
Negative interaction of LO and ASD (b=-0.48, p=0.13)
model.asd = glmer(data = df_merged,
is_start_numeric ~ condition + knowledge_cue + log_odds +
recent_mention + first_mention + asd + asd:log_odds +
(1 + condition| item),
control=glmerControl(optimizer="bobyqa"),
family = binomial())## boundary (singular) fit: see ?isSingular
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## is_start_numeric ~ condition + knowledge_cue + log_odds + recent_mention +
## first_mention + asd + asd:log_odds + (1 + condition | item)
## Data: df_merged
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 535.1 583.7 -256.5 513.1 602
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.5049 -0.4946 0.2135 0.3675 2.8333
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## item (Intercept) 0.2689 0.5186
## conditionTrue Belief 0.3227 0.5681 -1.00
## Number of obs: 613, groups: item, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.4336 0.3631 6.703 2.04e-11 ***
## conditionTrue Belief -3.4573 0.3334 -10.370 < 2e-16 ***
## knowledge_cueImplicit -0.5804 0.3606 -1.609 0.1075
## log_odds 0.1610 0.1110 1.451 0.1467
## recent_mentionStart 0.3182 0.2292 1.388 0.1650
## first_mentionStart -0.0122 0.2266 -0.054 0.9571
## asdTrue 1.2278 0.6622 1.854 0.0637 .
## log_odds:asdTrue -0.4727 0.3186 -1.484 0.1379
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtTB knwl_I lg_dds rcnt_S frst_S asdTru
## condtnTrBlf -0.761
## knwldg_cImp -0.562 0.398
## log_odds -0.355 0.320 0.746
## rcnt_mntnSt -0.229 -0.149 -0.071 -0.112
## frst_mntnSt -0.251 -0.063 -0.110 -0.105 0.042
## asdTrue -0.051 -0.022 0.048 0.012 0.014 -0.042
## lg_dds:sdTr -0.040 0.065 0.022 -0.127 -0.054 -0.012 0.242
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
Significant negative interaction of LO and Dyslexia (b=-0.69, p=0.03)
model.dyslexia = glmer(data = df_merged,
is_start_numeric ~ condition + knowledge_cue + log_odds +
recent_mention + first_mention + dyslexia + dyslexia:log_odds +
(1 + condition| item),
control=glmerControl(optimizer="bobyqa"),
family = binomial())## boundary (singular) fit: see ?isSingular
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## is_start_numeric ~ condition + knowledge_cue + log_odds + recent_mention +
## first_mention + dyslexia + dyslexia:log_odds + (1 + condition | item)
## Data: df_merged
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 538.2 586.8 -258.1 516.2 602
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.5915 -0.4968 0.2101 0.3713 2.9784
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## item (Intercept) 0.2461 0.4961
## conditionTrue Belief 0.2890 0.5376 -1.00
## Number of obs: 613, groups: item, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.463011 0.361585 6.812 9.65e-12 ***
## conditionTrue Belief -3.438268 0.330080 -10.416 < 2e-16 ***
## knowledge_cueImplicit -0.596384 0.357874 -1.666 0.0956 .
## log_odds 0.161568 0.110467 1.463 0.1436
## recent_mentionStart 0.305069 0.227933 1.338 0.1808
## first_mentionStart 0.002187 0.225505 0.010 0.9923
## dyslexiaTrue 0.347216 0.628113 0.553 0.5804
## log_odds:dyslexiaTrue -0.656792 0.301552 -2.178 0.0294 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtTB knwl_I lg_dds rcnt_S frst_S dyslxT
## condtnTrBlf -0.761
## knwldg_cImp -0.564 0.403
## log_odds -0.352 0.323 0.744
## rcnt_mntnSt -0.230 -0.150 -0.069 -0.114
## frst_mntnSt -0.250 -0.070 -0.111 -0.107 0.049
## dyslexiaTru -0.125 0.052 0.084 0.037 0.035 -0.025
## lg_dds:dysT -0.058 0.091 -0.008 -0.156 -0.032 -0.004 -0.133
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
NS positive interaction of LO and ADHD (b=0.42, p=0.2)
model.adhd = glmer(data = df_merged,
is_start_numeric ~ condition + knowledge_cue + log_odds +
recent_mention + first_mention + adhd + adhd:log_odds +
(1 + condition| item),
control=glmerControl(optimizer="bobyqa"),
family = binomial())## boundary (singular) fit: see ?isSingular
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## is_start_numeric ~ condition + knowledge_cue + log_odds + recent_mention +
## first_mention + adhd + adhd:log_odds + (1 + condition | item)
## Data: df_merged
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 538.0 586.6 -258.0 516.0 602
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -15.2962 -0.4949 0.2172 0.3785 2.7508
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## item (Intercept) 0.2899 0.5384
## conditionTrue Belief 0.3165 0.5626 -1.00
## Number of obs: 613, groups: item, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.34737 0.36430 6.443 1.17e-10 ***
## conditionTrue Belief -3.39992 0.33114 -10.267 < 2e-16 ***
## knowledge_cueImplicit -0.56749 0.35671 -1.591 0.1116
## log_odds 0.11531 0.11118 1.037 0.2997
## recent_mentionStart 0.29331 0.22737 1.290 0.1970
## first_mentionStart 0.03295 0.22613 0.146 0.8841
## adhdTrue 0.90818 0.46778 1.941 0.0522 .
## log_odds:adhdTrue 0.33972 0.32831 1.035 0.3008
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cndtTB knwl_I lg_dds rcnt_S frst_S adhdTr
## condtnTrBlf -0.758
## knwldg_cImp -0.556 0.395
## log_odds -0.356 0.336 0.745
## rcnt_mntnSt -0.231 -0.140 -0.071 -0.119
## frst_mntnSt -0.249 -0.078 -0.104 -0.104 0.040
## adhdTrue -0.104 -0.033 0.053 -0.003 0.000 0.070
## lg_dds:dhdT -0.039 0.002 0.017 -0.135 0.008 0.043 0.376
## optimizer (bobyqa) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
df_merged_summ = df_merged %>%
mutate(p_start_cond = 1/(1 + exp(-log_odds))) %>%
group_by(item_id, condition, knowledge_cue, recent_mention, first_mention) %>%
summarise(prop_start = mean(is_start),
lo = mean(log_odds),
accuracy = mean(is_correct),
p_start_gpt3 = mean(p_start_cond),
.groups="drop")df_merged_summ %>%
mutate("GPT-3\n(Proportion)" = p_start_gpt3,
"Human\n(Proportion)" = prop_start) %>%
pivot_longer(cols = c("Human\n(Proportion)", "GPT-3\n(Proportion)"),
names_to = "metric",
values_to = "value") %>%
ggplot(aes(x = value,
fill = condition)) +
geom_density(alpha = .5, color="#666666") +
theme_minimal() +
facet_wrap(. ~ metric,
# scales = "free",
ncol=1,
strip.position = "left") +
geom_vline(xintercept = .5, linetype = "dotted") +
theme(
legend.position = "bottom"
) +
scale_y_continuous(position="right") +
labs(
fill = "Knowledge State",
x = "P(Start)",
y = "Density"
) +
theme(axis.title = element_text(size=rel(2)),
axis.text = element_text(size = rel(2)),
legend.text = element_text(size = rel(2)),
legend.title = element_text(size = rel(2)),
strip.text.y = element_text(size = rel(2)))df_merged_summ %>%
mutate(
lo.correct = case_when(
condition == "False Belief" ~ lo,
T ~ -1 * lo,
)
) %>%
ggplot(aes(x = lo.correct, y = accuracy, color=condition, fill=condition)) +
geom_point(position=position_jitter(height=0.01), alpha=0.75) +
geom_smooth(method="lm", formula="y~x", alpha=0.15) +
theme_minimal() +
labs(
y = "Human Accuracy",
x = "GPT-3 Log-odds Ratio (Correct - Incorrect)",
fill = "Knowledge State",
color = "Knowledge State"
) +
theme(
legend.position = "top"
)r2 <- c(
r.squaredGLMM(model_all_but_lo_and_condition)[1],
r.squaredGLMM(model_no_condition)[1],
r.squaredGLMM(model_all_but_lo)[1],
r.squaredGLMM(model_all_fe)[1])## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.
## Warning: The null model is correct only if all variables used by the original
## model remain unchanged.
## Warning: The null model is correct only if all variables used by the original
## model remain unchanged.
## Warning: The null model is correct only if all variables used by the original
## model remain unchanged.
## Warning: The null model is correct only if all variables used by the original
## model remain unchanged.
model <- c(
"Base",
"Base + GPT-3",
"Base + Condition",
"Base + GPT-3 + Condition"
)
df.r2 <- data.frame(model, r2)
df.r2 %>%
ggplot(aes(x = r2, y = reorder(model, -r2))) +
geom_bar(stat="identity", fill = "#69c8ff") +
theme_minimal() +
labs(
x = bquote("Marginal"~R^2~""),
y = "Predictors"
)